---
title: "Yona_EJ_GHGs_Dec2024"
output: html_document
date: "2024-12-13"
---

# Author’s Note: ChatGPT4 was used to debug this R code

PrincipalPortsIntersect <- read.csv("PrincipalPorts_Justice40_Intersect_v1_Table.csv")

PrincipalPortsOneMile <- read.csv("PrincipalPorts_Justice40_OneMile_v1_Table.csv")

AirportsIntersect <- read.csv("Airports_Justice40_Intersect_v1_Table.csv")

AirportsOneMile <- read.csv("Airports_Justice40_OneMile_v1_Table.csv")

# subset of both airports data sets to include only airports with international customs:
SubsetAirportsIntersect <- subset(AirportsIntersect, fac_type == "AIRPORT" & (intl_custo == "Y" | intl_cus_1 == "Y"))

SubsetAirportsOneMile <- subset(AirportsOneMile, fac_type == "AIRPORT" & (intl_custo == "Y" | intl_cus_1 == "Y"))


# Copy ‘fac_name’ to a new column ‘PORT_NAME’ so that I can use the function below
SubsetAirportsIntersect$PORT_NAME <- SubsetAirportsIntersect$fac_name

SubsetAirportsOneMile$PORT_NAME <- SubsetAirportsOneMile$fac_name


calculate_metrics_and_print <- function(df) {
  # Replace NA values in SN_T with 0
  df$SN_T[is.na(df$SN_T)] <- 0
  
  # Step 1: Count of unique PORT_NAME where SN_C = 1 or SN_T = 1
  unique_port_name_sn_c_1 <- length(unique(df$PORT_NAME[df$SN_C == 1 | df$SN_T == 1]))
  
  # Step 2: Count of different values in PORT_NAME
  total_unique_port_name <- length(unique(df$PORT_NAME))
  
  # Step 3: Fraction of Step 1 / Step 2
  fraction_port_name <- unique_port_name_sn_c_1 / total_unique_port_name
  
  # Step 4: Count of all rows in GEOID10 where SN_C = 1 or SN_T = 1
  count_geoid10_sn_c_1 <- sum(df$SN_C == 1 | df$SN_T == 1)
  
  # Step 5: Count of all rows in GEOID10
  total_count_geoid10 <- nrow(df)
  
  # Step 6: Fraction of Step 4 / Step 5
  fraction_geoid10 <- count_geoid10_sn_c_1 / total_count_geoid10
  
  # Step 7: Total sum of TPF where SN_C = 1 or SN_T = 1
  sum_tpf_sn_c_1 <- sum(df$TPF[df$SN_C == 1 | df$SN_T == 1], na.rm = TRUE)
  
  # Step 8: Total sum of TPF
  total_sum_tpf <- sum(df$TPF, na.rm = TRUE)
  
  # Step 9: Fraction of Step 7 / Step 8
  fraction_tpf <- sum_tpf_sn_c_1 / total_sum_tpf
  
  # Print results
  cat("Results:\n")
  cat("Number of ports in or near disadvantaged communities: ", unique_port_name_sn_c_1, "\n")
  cat("Number of ports in or near all communities: ", total_unique_port_name, "\n")
  cat("Fraction of ports in or near disadvantaged communities: ", fraction_port_name, "\n")
  cat("Number of disadvantaged community census tracts that contain or are near a port: ", count_geoid10_sn_c_1, "\n")
  cat("Number of census tracts that contain or are near a port: ", total_count_geoid10, "\n")
  cat("Fraction of disadvantaged community census tracts that contain or are near a port: ", fraction_geoid10, "\n")
  cat("Total number of people in disadvantaged community census tracts that contain or are near a port: ", sum_tpf_sn_c_1, "\n")
  cat("Total number of people in census tracts that contain or are near a port: ", total_sum_tpf, "\n")
  cat("Fraction of people in disadvantaged community census tracts that contain or are near a port: ", fraction_tpf, "\n")
}

#Use function with each of my datasets
calculate_metrics_and_print(PrincipalPortsIntersect)

calculate_metrics_and_print(PrincipalPortsOneMile)

calculate_metrics_and_print(SubsetAirportsIntersect)

calculate_metrics_and_print(SubsetAirportsOneMile)


#make everything fit nicely into a table
calculate_metrics_df <- function(df, name) {
  # Replace NA values in SN_T with 0
  df$SN_T[is.na(df$SN_T)] <- 0
  
  # Calculations
  unique_port_name_sn_c_1 <- length(unique(df$PORT_NAME[df$SN_C == 1 | df$SN_T == 1]))
  total_unique_port_name <- length(unique(df$PORT_NAME))
  fraction_port_name <- unique_port_name_sn_c_1 / total_unique_port_name
  count_geoid10_sn_c_1 <- sum(df$SN_C == 1 | df$SN_T == 1)
  total_count_geoid10 <- nrow(df)
  fraction_geoid10 <- count_geoid10_sn_c_1 / total_count_geoid10
  sum_tpf_sn_c_1 <- sum(df$TPF[df$SN_C == 1 | df$SN_T == 1], na.rm = TRUE)
  total_sum_tpf <- sum(df$TPF, na.rm = TRUE)
  fraction_tpf <- sum_tpf_sn_c_1 / total_sum_tpf
  
  # Return results as a one-row data frame
  return(data.frame(
    DataSet = name,
    UniquePorts = unique_port_name_sn_c_1,
    TotalPorts = total_unique_port_name,
    FractionPorts = fraction_port_name,
    GEOIDCount = count_geoid10_sn_c_1,
    TotalGEOID = total_count_geoid10,
    FractionGEOID = fraction_geoid10,
    SumTPFSN = sum_tpf_sn_c_1,
    TotalTPF = total_sum_tpf,
    FractionTPF = fraction_tpf
  ))
}

# Define the list of data frames and their names
data_frames <- list(PrincipalPortsIntersect, PrincipalPortsOneMile, SubsetAirportsIntersect, SubsetAirportsOneMile)
data_frame_names <- c("Principal Seaports (Direct Intersection)", "Principal Seaports (1 Mile Radius)", "International Airports (Direct Intersection)", "International Airports (1 Mile Radius)")

# Initialize an empty data frame for results
results_table <- data.frame()

# Loop through each data frame, applying the function
for (i in seq_along(data_frames)) {
  results <- calculate_metrics_df(data_frames[[i]], data_frame_names[i])
  results_table <- rbind(results_table, results)
}

names(results_table) <- c("Data", "Disadvantaged Ports", "All Ports", 
                          "Fraction of Disadvantaged Ports", "Disadvantaged Census Tracts", "All Census Tracts", 
                          "Fraction of Disadvantaged Census Tracts", "Disadvantaged Population", "Total Population", 
                          "Fraction of Disadvantaged Population")

# Print the combined results
print(results_table)

#export the table

folder_path <- ""

file_path <- paste0(folder_path, "/results_table.csv")

write.csv(results_table, file_path, row.names = FALSE)
